The following code digs into the chronological clustering of both the community size spectrum slopes, and the mean sizes across all years of the NMFS groundfish survey.
Data is prepared and updated using {targets} to ensure a consistent data state and a reproducible workflow.
Target Data
Data for the report comes directly from the {targets} workflow found in _targets.R.
#### Data ####
#### Size Spectrum Targets
# SS and manual bins together
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(size_spectrum_indices))
#### OISST Data
# OISST for all the regions
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(regional_oisst))
#### Size Data Targets
# 1. Biological data used as input
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(nefsc_bio_lw)) # Biological Data for sizes
# 2. Mean sizes grouped on - yr, season, survey area, taxon group, fishery status
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_individual_sizes))
# 3. Mean sizes using same groupings as the size spectrum indices
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_sizes_ss_groups))
Community size spectrum slopes were estimated using 2 methods for comparison, using a minimum individual biomass of 1 gram, and using area stratified abundances.
The first was using the methods of the {sizeSpectra} package which were shown to be the most accurate when using simulated data.
The second method uses binned abundances, with bins of width 0.5 on a log10 scale, so 10^0 - 10^0.5 etc. These bins were then normalized by dividing the abundances by the bin width to account for the increasing bin size.
Results from Both Methods
# Pull the group ID for the slopes grouped on year, season, and region
warmem_group_slopes <- size_spectrum_indices %>%
filter(`group ID`== "single years * season * region") %>%
mutate(season = fct_rev(season),
survey_area = factor(survey_area,
levels = c("GoM", "GB", "SNE", "MAB")),
yr = as.numeric(as.character(Year)))
The Edwards methodology differs from the other methods for estimating size spectra by avoiding the subjective decisions around how to bin data prior to fitting the log-linear regression.
Instead, Edwards uses the individual size distributions (how many individuals of any given length/weight). Abundances are totaled into discrete size bins based on expected biomass at length and length + 1, and individuals that fall within those bins are totaled to get abundances across a continuous distribution of individual bodymass.
The next difference is that the individual body size data is presumed to follow a bounded power-law distribution, with a minimum and maximum body size. Using the individual size data, maximum likelihood estimation is used to solve for the parameter (b) that minimizes the negative log-likelihood.
Long-Term Patterns
# Plot the sizeSpectra slopes
(ss_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, b, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - Edwards Method")
)
Chronological Clustering
# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, b)
# Pivot wider
ss_dat <- cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = b,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
# Get Euclidean Distances
eucdist <- vegdist(in_dat,
method = "euclidean",
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = TRUE)
# Perform Chronological Clustering on distances
cl <- chclust(eucdist, method = "coniss")
# Return list
return(list(eucdist = eucdist, cl = cl))
}
# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)
Broomstick Plot
# broken stick model
vegan::bstick(ss_chron$cl, plot=T)
Dendrogram
plot(ss_chron$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
ss_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2004.5), color = "gray40", linetype = 2, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1980.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1979.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1973.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2007.5), color = "gray50", linetype = 3, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
Long-Term Patterns
# plot trends of log10 slopes
(l10_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_slope_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Chronological Clustering
# Reshaping
# Goal: Row for Years, columns for each different group
l10_cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_slope_strat)
# Pivot wider
l10_dat <- l10_cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_slope_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run clustering
l10_clust <- run_chron_clust(in_dat = l10_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_clust$cl, plot=T)
Dendrogram
plot(l10_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Size Spectrum Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
l10_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2001.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray40", linetype = 2, size = 0.7) +
# geom_vline(aes(xintercept = 1997.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1975.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 1973.5), color = "gray50", linetype = 3, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 5 or 8 breaks")
Long-Term Patterns
# plot trends of log10 slopes
(int_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_int_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Intercept",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Chronological Clustering
# Reshaping
# Goal: Row for Years, columns for each different group
l10_intercept_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_int_strat)
# Pivot wider
intercept_dat <- l10_intercept_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_int_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run Clustering
l10_ints <- run_chron_clust(in_dat = intercept_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_ints$cl, plot=T)
Dendrogram
plot(l10_ints$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Bins Intercept - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
# Re-plot patterns with breaks
int_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints")
Body size (length and bodymass) were calculated using data obtained from NOAA that had length and weight data for individual fishes and inverts.
For these metrics there was no area stratification performed. Data was also filtered to species used in the size-spectrum analysis i.e. species from Wigley 2006.
# Pull the group ID for the slopes grouped on year, season, and region
warmem_group_sizes <- mean_sizes_ss_groups %>%
filter(`group ID`== "single years * season * region") %>%
mutate(yr = as.numeric(as.character(Year)),
season = fct_rev(season),
survey_area = factor(survey_area,
levels = c("GoM", "GB", "SNE", "MAB")),
yr = as.numeric(as.character(Year)))
Long-Term Patterns
# plot trends of log10 slopes
(len_patterns <- warmem_group_sizes %>%
ggplot(aes(yr, mean_len_cm, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Mean Length (cm)",
color = "",
title = "All ")
)
Chronological Clustering
# Pull Lengths
mean_lengths <- warmem_group_sizes %>%
select(Year, season, survey_area, mean_len_cm)
# Pivot Wider
len_dat <- mean_lengths %>%
pivot_wider(names_from = c(season, survey_area),
values_from = mean_len_cm,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run Clustering
len_clust <- run_chron_clust(in_dat = len_dat)
Broomstick Plot
# broken stick model
vegan::bstick(len_clust$cl, plot=T)
Dendrogram
plot(len_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Mean Individual Length - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
# Re-plot patterns with breaks
len_patterns +
geom_vline(aes(xintercept = 2008.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1992.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1991.5), color = "gray50", linetype = 2, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
# geom_vline(aes(xintercept = 2001.5), color = "gray50", linetype = 3, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
Long-Term Patterns
# plot trends of log10 slopes
(wt_patterns <- warmem_group_sizes %>%
ggplot(aes(yr, mean_wt_kg, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(season~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Mean Weight (kg)",
color = "",
title = "All ")
)
Chronological Clustering
# Pull Lengths
mean_wts <- warmem_group_sizes %>%
select(Year, season, survey_area, mean_wt_kg)
# Pivot Wider
wt_dat <- mean_wts %>%
pivot_wider(names_from = c(season, survey_area),
values_from = mean_wt_kg,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run Clustering
wt_clust <- run_chron_clust(in_dat = wt_dat)
Broomstick Plot
# broken stick model
vegan::bstick(wt_clust$cl, plot=T)
Dendrogram
plot(wt_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Mean Individual Weight - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
# Re-plot patterns with breaks
wt_patterns +
geom_vline(aes(xintercept = 2008.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1992.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
# Pull Weights
mean_weights <- warmem_group_sizes %>%
select(Year, season, survey_area, mean_wt_kg)
Long-Term Patterns
# plot trends of log10 slopes
(sst_patterns <- regional_oisst %>%
ggplot(aes(yr, sst_anom, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(~survey_area) +
scale_color_gmri() +
labs(x = "",
y = "Mean Temperature Anomaly",
color = "",
title = "")
)
Chronological Clustering
# Pivot OISST Wider
oisst_dat <- regional_oisst %>%
select(yr, survey_area, sst_anom) %>%
pivot_wider(names_from = survey_area, values_from = sst_anom) %>%
column_to_rownames(var = "yr")
# Run clustering
sst_clust <- run_chron_clust(oisst_dat)
Broomstick Plot
# broken stick model
vegan::bstick(sst_clust$cl, plot=T)
Dendrogram
plot(sst_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("SST Anomalies - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
Slopes with Breakpoints
# Re-plot patterns with breaks
sst_patterns +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1998.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2005.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints - 3 or 8")
A work by Adam A. Kemberling
Akemberling@gmri.org